Computer Vision Project (Module 2) submitted for PGP-AIML Great Learning on 29-May-2022
DOMAIN: Entertainment
CONTEXT:Company X owns a movie application and repository which caters movie streaming to millions of users who on subscription basis. Company wants to automate the process of cast and crew information in each scene from a movie such that when a user pauses on the movie and clicks on cast information button, the app will show details of the actor in the scene. Company has an in-house computer vision and multimedia experts who need to detect faces from screen shots from the movie scene.
The data labelling is already done.
• DATA DESCRIPTION:The dataset comprises of images and its mask for corresponding human face.
• PROJECT OBJECTIVE: : To build a face detection system
1.Import and Understand the data [7 Marks]
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import collections
import seaborn as sns
import pandas as pd
from sklearn.metrics import mean_squared_error, confusion_matrix, classification_report, roc_curve, precision_recall_curve, roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Activation, LeakyReLU
from keras import optimizers
# Import label encoder
from sklearn import preprocessing
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.layers import Dropout, InputLayer, BatchNormalization
from tensorflow.keras import layers
import cv2
import glob
import os
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
A. Import and read ‘images.npy. [1 Mark]
#.npy is the extension of numpy array .
# numpy.load to be used to read the numpy array
image_array= np.load('images.npy',allow_pickle=True)
image_array[0][0]
array([[[42, 37, 34],
[56, 51, 48],
[71, 66, 63],
...,
[23, 33, 34],
[26, 36, 37],
[28, 38, 39]],
[[40, 35, 32],
[51, 46, 43],
[64, 59, 56],
...,
[27, 36, 35],
[24, 33, 32],
[26, 35, 34]],
[[43, 38, 35],
[51, 46, 43],
[61, 56, 53],
...,
[28, 30, 27],
[33, 35, 32],
[35, 37, 34]],
...,
[[56, 47, 40],
[57, 48, 41],
[61, 52, 45],
...,
[67, 48, 42],
[55, 35, 28],
[60, 40, 33]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[60, 40, 33],
[54, 34, 27]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[70, 50, 43],
[64, 44, 37]]], dtype=uint8)
plt.imshow(image_array[0][0])
plt.grid(False)
plt.imshow(image_array[11][0])
plt.grid(False)
plt.imshow(image_array[255][0])
plt.grid(False)
image_array.shape
(409, 2)
Observation: The image array has 409 images and the array is 2 dimensional
image_array[0][1]
[{'label': ['Face'],
'notes': '',
'points': [{'x': 0.08615384615384615, 'y': 0.3063063063063063},
{'x': 0.1723076923076923, 'y': 0.45345345345345345}],
'imageWidth': 650,
'imageHeight': 333},
{'label': ['Face'],
'notes': '',
'points': [{'x': 0.583076923076923, 'y': 0.2912912912912913},
{'x': 0.6584615384615384, 'y': 0.46846846846846846}],
'imageWidth': 650,
'imageHeight': 333}]
face_list = image_array[239][1]
for i in range(len(face_list)):
print(('--'*26))
print(face_list[i])
face_arr=face_list[i]
print(face_arr.get('label'))
print(face_arr.get('points'))
points=face_arr.get('points')
print('length of points',len(points))
for j in range(len(points)):
print('point details',points[j])
imageWidth=face_arr.get('imageWidth')
imageHeight=face_arr.get('imageHeight')
print('imageWidth',imageWidth)
print('imageHeight',imageHeight)
----------------------------------------------------
{'label': ['Face'], 'notes': '', 'points': [{'x': 0.25833333333333336, 'y': 0.25}, {'x': 0.5479166666666667, 'y': 0.6611111111111111}], 'imageWidth': 480, 'imageHeight': 360}
['Face']
[{'x': 0.25833333333333336, 'y': 0.25}, {'x': 0.5479166666666667, 'y': 0.6611111111111111}]
length of points 2
point details {'x': 0.25833333333333336, 'y': 0.25}
point details {'x': 0.5479166666666667, 'y': 0.6611111111111111}
imageWidth 480
imageHeight 360
Observation: The first Dimension has the image details in array and second dimension has meta data about face mask information
## Method to display random images
def display_random_images(no_of_images,im_array,title_name):## accept number of random images as input, data frame and name of title column
randomlist = random.sample(range(0, im_array.shape[0]), no_of_images)
print(randomlist)
w = 10
h = 10
fig = plt.figure(figsize=(15, 15))
columns = 3
## Logic to find the number of grids to represent
num_rows=no_of_images//columns # Quotient will be stored in num_rows
reminder=no_of_images%columns # Reminder will be stored in Reminder
if(reminder==0):
rows=num_rows
else:
rows=num_rows+1
#rows =2
x=0
for i in randomlist:
x=x+1
fig.add_subplot(rows, columns, x)
fig.add_subplot(rows, columns, x)
#species_name=data.iloc[i][title_name]
plt.title(i)
#img=seedling_df.iloc[i]['actual_img']
plt.grid(False)
plt.imshow(image_array[i][0])
plt.show()
import random
display_random_images(9,image_array,'images')
[389, 233, 189, 206, 176, 323, 218, 201, 365]
B. Split the data into Features(X) & labels(Y). Unify shape of all the images. [3 Marks]
ALPHA = 1
IMAGE_SIZE = 224
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
# Create features and labels
from tensorflow.keras.applications.mobilenet import preprocess_input
y = np.zeros((int(image_array.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH))
X = np.zeros((int(image_array.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH, 3))
for index in range(image_array.shape[0]):
img = image_array[index][0]
img = cv2.resize(img, dsize = (IMAGE_HEIGHT, IMAGE_WIDTH), interpolation = cv2.INTER_CUBIC)
try:
img = img[:, :, :3]
except:
continue
X[index] = preprocess_input(np.array(img, dtype = np.float32))
for i in image_array[index][1]:
x1 = int(i['points'][0]['x'] * IMAGE_WIDTH)
x2 = int(i['points'][1]['x'] * IMAGE_WIDTH)
y1 = int(i['points'][0]['y'] * IMAGE_HEIGHT)
y2 = int(i['points'][1]['y'] * IMAGE_HEIGHT)
y[index][y1:y2, x1:x2] = 1
X.shape
(409, 224, 224, 3)
y.shape
(409, 224, 224)
plt.imshow(X[239])
plt.grid(False)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
plt.imshow(y[239])
plt.grid(False)
C. Split the data into train and test[400:9]. [1 Marks]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.02, random_state = 40, shuffle = False)
X_train.shape
(400, 224, 224, 3)
X_test.shape
(9, 224, 224, 3)
y_train.shape
(400, 224, 224)
y_test.shape
(9, 224, 224)
image_array.size
818
image_array.shape
(409, 2)
image_array[0]
array([array([[[42, 37, 34],
[56, 51, 48],
[71, 66, 63],
...,
[23, 33, 34],
[26, 36, 37],
[28, 38, 39]],
[[40, 35, 32],
[51, 46, 43],
[64, 59, 56],
...,
[27, 36, 35],
[24, 33, 32],
[26, 35, 34]],
[[43, 38, 35],
[51, 46, 43],
[61, 56, 53],
...,
[28, 30, 27],
[33, 35, 32],
[35, 37, 34]],
...,
[[56, 47, 40],
[57, 48, 41],
[61, 52, 45],
...,
[67, 48, 42],
[55, 35, 28],
[60, 40, 33]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[60, 40, 33],
[54, 34, 27]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[70, 50, 43],
[64, 44, 37]]], dtype=uint8),
list([{'label': ['Face'], 'notes': '', 'points': [{'x': 0.08615384615384615, 'y': 0.3063063063063063}, {'x': 0.1723076923076923, 'y': 0.45345345345345345}], 'imageWidth': 650, 'imageHeight': 333}, {'label': ['Face'], 'notes': '', 'points': [{'x': 0.583076923076923, 'y': 0.2912912912912913}, {'x': 0.6584615384615384, 'y': 0.46846846846846846}], 'imageWidth': 650, 'imageHeight': 333}])],
dtype=object)
D. Select random image from the train data and display original image and masked image. [2 Marks]
X_train.shape[0]
400
n=random.randint(0,X_train.shape[0])
n
86
plt.imshow(X_train[n])
plt.grid(False)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
plt.imshow(y_train[n])
plt.grid(False)
2.Model building [11 Marks]
Hint: 1. Use MobileNet architecture for initial pre-trained non-trainable layers.
Hint: 2. Add appropriate Upsampling layers to imitate U-net architecture.
A.Design a face mask detection model. [4 Marks]
ALPHA = 1
IMAGE_SIZE = 224
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
from tensorflow.keras.layers import Concatenate, UpSampling2D, Conv2D, Reshape, Activation, BatchNormalization, SpatialDropout2D
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.models import Model
def conv_block_simple(prevlayer, filters, prefix, strides=(1, 1)):
conv = Conv2D(filters, (3, 3), padding = 'same', kernel_initializer = 'he_normal', strides = strides, name = prefix + '_conv')(prevlayer)
conv = BatchNormalization(name = prefix + 'BatchNormalization')(conv)
conv = Activation('relu', name = prefix + 'ActivationLayer')(conv)
return conv
So the overall architecture of the Mobilenet is as follows, having 30 layers with
def create_model(trainable = True):
model = MobileNet(input_shape = (IMAGE_HEIGHT, IMAGE_WIDTH, 3), include_top = False, alpha = ALPHA, weights = 'imagenet')
for layer in model.layers:
layer.trainable = trainable
block1 = model.get_layer('conv_pw_13_relu').output
block2 = model.get_layer('conv_pw_11_relu').output
block3 = model.get_layer('conv_pw_5_relu').output
block4 = model.get_layer('conv_pw_3_relu').output
block5 = model.get_layer('conv_pw_1_relu').output
up1 = Concatenate()([UpSampling2D()(block1), block2])
conv6 = conv_block_simple(up1, 256, 'Conv_6_1')
conv6 = conv_block_simple(conv6, 256, 'Conv_6_2')
up2 = Concatenate()([UpSampling2D()(conv6), block3])
conv7 = conv_block_simple(up2, 256, 'Conv_7_1')
conv7 = conv_block_simple(conv7, 256, 'Conv_7_2')
up3 = Concatenate()([UpSampling2D()(conv7), block4])
conv8 = conv_block_simple(up3, 192, 'Conv_8_1')
conv8 = conv_block_simple(conv8, 128, 'Conv_8_2')
up4 = Concatenate()([UpSampling2D()(conv8), block5])
conv9 = conv_block_simple(up4, 96, 'Conv_9_1')
conv9 = conv_block_simple(conv9, 64, 'Conv_9_2')
up5 = Concatenate()([UpSampling2D()(conv9), model.input])
conv10 = conv_block_simple(up5, 48, 'Conv_10_1')
conv10 = conv_block_simple(conv10, 32, 'Conv_10_2')
conv10 = SpatialDropout2D(0.2)(conv10)
x = Conv2D(1, (1, 1), activation = 'sigmoid')(conv10)
x = Reshape((IMAGE_SIZE, IMAGE_SIZE))(x)
return Model(inputs = model.input, outputs = x)
model = create_model(True)
model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 224, 224, 3 0 []
)]
conv1 (Conv2D) (None, 112, 112, 32 864 ['input_1[0][0]']
)
conv1_bn (BatchNormalization) (None, 112, 112, 32 128 ['conv1[0][0]']
)
conv1_relu (ReLU) (None, 112, 112, 32 0 ['conv1_bn[0][0]']
)
conv_dw_1 (DepthwiseConv2D) (None, 112, 112, 32 288 ['conv1_relu[0][0]']
)
conv_dw_1_bn (BatchNormalizati (None, 112, 112, 32 128 ['conv_dw_1[0][0]']
on) )
conv_dw_1_relu (ReLU) (None, 112, 112, 32 0 ['conv_dw_1_bn[0][0]']
)
conv_pw_1 (Conv2D) (None, 112, 112, 64 2048 ['conv_dw_1_relu[0][0]']
)
conv_pw_1_bn (BatchNormalizati (None, 112, 112, 64 256 ['conv_pw_1[0][0]']
on) )
conv_pw_1_relu (ReLU) (None, 112, 112, 64 0 ['conv_pw_1_bn[0][0]']
)
conv_pad_2 (ZeroPadding2D) (None, 113, 113, 64 0 ['conv_pw_1_relu[0][0]']
)
conv_dw_2 (DepthwiseConv2D) (None, 56, 56, 64) 576 ['conv_pad_2[0][0]']
conv_dw_2_bn (BatchNormalizati (None, 56, 56, 64) 256 ['conv_dw_2[0][0]']
on)
conv_dw_2_relu (ReLU) (None, 56, 56, 64) 0 ['conv_dw_2_bn[0][0]']
conv_pw_2 (Conv2D) (None, 56, 56, 128) 8192 ['conv_dw_2_relu[0][0]']
conv_pw_2_bn (BatchNormalizati (None, 56, 56, 128) 512 ['conv_pw_2[0][0]']
on)
conv_pw_2_relu (ReLU) (None, 56, 56, 128) 0 ['conv_pw_2_bn[0][0]']
conv_dw_3 (DepthwiseConv2D) (None, 56, 56, 128) 1152 ['conv_pw_2_relu[0][0]']
conv_dw_3_bn (BatchNormalizati (None, 56, 56, 128) 512 ['conv_dw_3[0][0]']
on)
conv_dw_3_relu (ReLU) (None, 56, 56, 128) 0 ['conv_dw_3_bn[0][0]']
conv_pw_3 (Conv2D) (None, 56, 56, 128) 16384 ['conv_dw_3_relu[0][0]']
conv_pw_3_bn (BatchNormalizati (None, 56, 56, 128) 512 ['conv_pw_3[0][0]']
on)
conv_pw_3_relu (ReLU) (None, 56, 56, 128) 0 ['conv_pw_3_bn[0][0]']
conv_pad_4 (ZeroPadding2D) (None, 57, 57, 128) 0 ['conv_pw_3_relu[0][0]']
conv_dw_4 (DepthwiseConv2D) (None, 28, 28, 128) 1152 ['conv_pad_4[0][0]']
conv_dw_4_bn (BatchNormalizati (None, 28, 28, 128) 512 ['conv_dw_4[0][0]']
on)
conv_dw_4_relu (ReLU) (None, 28, 28, 128) 0 ['conv_dw_4_bn[0][0]']
conv_pw_4 (Conv2D) (None, 28, 28, 256) 32768 ['conv_dw_4_relu[0][0]']
conv_pw_4_bn (BatchNormalizati (None, 28, 28, 256) 1024 ['conv_pw_4[0][0]']
on)
conv_pw_4_relu (ReLU) (None, 28, 28, 256) 0 ['conv_pw_4_bn[0][0]']
conv_dw_5 (DepthwiseConv2D) (None, 28, 28, 256) 2304 ['conv_pw_4_relu[0][0]']
conv_dw_5_bn (BatchNormalizati (None, 28, 28, 256) 1024 ['conv_dw_5[0][0]']
on)
conv_dw_5_relu (ReLU) (None, 28, 28, 256) 0 ['conv_dw_5_bn[0][0]']
conv_pw_5 (Conv2D) (None, 28, 28, 256) 65536 ['conv_dw_5_relu[0][0]']
conv_pw_5_bn (BatchNormalizati (None, 28, 28, 256) 1024 ['conv_pw_5[0][0]']
on)
conv_pw_5_relu (ReLU) (None, 28, 28, 256) 0 ['conv_pw_5_bn[0][0]']
conv_pad_6 (ZeroPadding2D) (None, 29, 29, 256) 0 ['conv_pw_5_relu[0][0]']
conv_dw_6 (DepthwiseConv2D) (None, 14, 14, 256) 2304 ['conv_pad_6[0][0]']
conv_dw_6_bn (BatchNormalizati (None, 14, 14, 256) 1024 ['conv_dw_6[0][0]']
on)
conv_dw_6_relu (ReLU) (None, 14, 14, 256) 0 ['conv_dw_6_bn[0][0]']
conv_pw_6 (Conv2D) (None, 14, 14, 512) 131072 ['conv_dw_6_relu[0][0]']
conv_pw_6_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_pw_6[0][0]']
on)
conv_pw_6_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_6_bn[0][0]']
conv_dw_7 (DepthwiseConv2D) (None, 14, 14, 512) 4608 ['conv_pw_6_relu[0][0]']
conv_dw_7_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_dw_7[0][0]']
on)
conv_dw_7_relu (ReLU) (None, 14, 14, 512) 0 ['conv_dw_7_bn[0][0]']
conv_pw_7 (Conv2D) (None, 14, 14, 512) 262144 ['conv_dw_7_relu[0][0]']
conv_pw_7_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_pw_7[0][0]']
on)
conv_pw_7_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_7_bn[0][0]']
conv_dw_8 (DepthwiseConv2D) (None, 14, 14, 512) 4608 ['conv_pw_7_relu[0][0]']
conv_dw_8_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_dw_8[0][0]']
on)
conv_dw_8_relu (ReLU) (None, 14, 14, 512) 0 ['conv_dw_8_bn[0][0]']
conv_pw_8 (Conv2D) (None, 14, 14, 512) 262144 ['conv_dw_8_relu[0][0]']
conv_pw_8_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_pw_8[0][0]']
on)
conv_pw_8_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_8_bn[0][0]']
conv_dw_9 (DepthwiseConv2D) (None, 14, 14, 512) 4608 ['conv_pw_8_relu[0][0]']
conv_dw_9_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_dw_9[0][0]']
on)
conv_dw_9_relu (ReLU) (None, 14, 14, 512) 0 ['conv_dw_9_bn[0][0]']
conv_pw_9 (Conv2D) (None, 14, 14, 512) 262144 ['conv_dw_9_relu[0][0]']
conv_pw_9_bn (BatchNormalizati (None, 14, 14, 512) 2048 ['conv_pw_9[0][0]']
on)
conv_pw_9_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_9_bn[0][0]']
conv_dw_10 (DepthwiseConv2D) (None, 14, 14, 512) 4608 ['conv_pw_9_relu[0][0]']
conv_dw_10_bn (BatchNormalizat (None, 14, 14, 512) 2048 ['conv_dw_10[0][0]']
ion)
conv_dw_10_relu (ReLU) (None, 14, 14, 512) 0 ['conv_dw_10_bn[0][0]']
conv_pw_10 (Conv2D) (None, 14, 14, 512) 262144 ['conv_dw_10_relu[0][0]']
conv_pw_10_bn (BatchNormalizat (None, 14, 14, 512) 2048 ['conv_pw_10[0][0]']
ion)
conv_pw_10_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_10_bn[0][0]']
conv_dw_11 (DepthwiseConv2D) (None, 14, 14, 512) 4608 ['conv_pw_10_relu[0][0]']
conv_dw_11_bn (BatchNormalizat (None, 14, 14, 512) 2048 ['conv_dw_11[0][0]']
ion)
conv_dw_11_relu (ReLU) (None, 14, 14, 512) 0 ['conv_dw_11_bn[0][0]']
conv_pw_11 (Conv2D) (None, 14, 14, 512) 262144 ['conv_dw_11_relu[0][0]']
conv_pw_11_bn (BatchNormalizat (None, 14, 14, 512) 2048 ['conv_pw_11[0][0]']
ion)
conv_pw_11_relu (ReLU) (None, 14, 14, 512) 0 ['conv_pw_11_bn[0][0]']
conv_pad_12 (ZeroPadding2D) (None, 15, 15, 512) 0 ['conv_pw_11_relu[0][0]']
conv_dw_12 (DepthwiseConv2D) (None, 7, 7, 512) 4608 ['conv_pad_12[0][0]']
conv_dw_12_bn (BatchNormalizat (None, 7, 7, 512) 2048 ['conv_dw_12[0][0]']
ion)
conv_dw_12_relu (ReLU) (None, 7, 7, 512) 0 ['conv_dw_12_bn[0][0]']
conv_pw_12 (Conv2D) (None, 7, 7, 1024) 524288 ['conv_dw_12_relu[0][0]']
conv_pw_12_bn (BatchNormalizat (None, 7, 7, 1024) 4096 ['conv_pw_12[0][0]']
ion)
conv_pw_12_relu (ReLU) (None, 7, 7, 1024) 0 ['conv_pw_12_bn[0][0]']
conv_dw_13 (DepthwiseConv2D) (None, 7, 7, 1024) 9216 ['conv_pw_12_relu[0][0]']
conv_dw_13_bn (BatchNormalizat (None, 7, 7, 1024) 4096 ['conv_dw_13[0][0]']
ion)
conv_dw_13_relu (ReLU) (None, 7, 7, 1024) 0 ['conv_dw_13_bn[0][0]']
conv_pw_13 (Conv2D) (None, 7, 7, 1024) 1048576 ['conv_dw_13_relu[0][0]']
conv_pw_13_bn (BatchNormalizat (None, 7, 7, 1024) 4096 ['conv_pw_13[0][0]']
ion)
conv_pw_13_relu (ReLU) (None, 7, 7, 1024) 0 ['conv_pw_13_bn[0][0]']
up_sampling2d (UpSampling2D) (None, 14, 14, 1024 0 ['conv_pw_13_relu[0][0]']
)
concatenate (Concatenate) (None, 14, 14, 1536 0 ['up_sampling2d[0][0]',
) 'conv_pw_11_relu[0][0]']
Conv_6_1_conv (Conv2D) (None, 14, 14, 256) 3539200 ['concatenate[0][0]']
Conv_6_1BatchNormalization (Ba (None, 14, 14, 256) 1024 ['Conv_6_1_conv[0][0]']
tchNormalization)
Conv_6_1ActivationLayer (Activ (None, 14, 14, 256) 0 ['Conv_6_1BatchNormalization[0][0
ation) ]']
Conv_6_2_conv (Conv2D) (None, 14, 14, 256) 590080 ['Conv_6_1ActivationLayer[0][0]']
Conv_6_2BatchNormalization (Ba (None, 14, 14, 256) 1024 ['Conv_6_2_conv[0][0]']
tchNormalization)
Conv_6_2ActivationLayer (Activ (None, 14, 14, 256) 0 ['Conv_6_2BatchNormalization[0][0
ation) ]']
up_sampling2d_1 (UpSampling2D) (None, 28, 28, 256) 0 ['Conv_6_2ActivationLayer[0][0]']
concatenate_1 (Concatenate) (None, 28, 28, 512) 0 ['up_sampling2d_1[0][0]',
'conv_pw_5_relu[0][0]']
Conv_7_1_conv (Conv2D) (None, 28, 28, 256) 1179904 ['concatenate_1[0][0]']
Conv_7_1BatchNormalization (Ba (None, 28, 28, 256) 1024 ['Conv_7_1_conv[0][0]']
tchNormalization)
Conv_7_1ActivationLayer (Activ (None, 28, 28, 256) 0 ['Conv_7_1BatchNormalization[0][0
ation) ]']
Conv_7_2_conv (Conv2D) (None, 28, 28, 256) 590080 ['Conv_7_1ActivationLayer[0][0]']
Conv_7_2BatchNormalization (Ba (None, 28, 28, 256) 1024 ['Conv_7_2_conv[0][0]']
tchNormalization)
Conv_7_2ActivationLayer (Activ (None, 28, 28, 256) 0 ['Conv_7_2BatchNormalization[0][0
ation) ]']
up_sampling2d_2 (UpSampling2D) (None, 56, 56, 256) 0 ['Conv_7_2ActivationLayer[0][0]']
concatenate_2 (Concatenate) (None, 56, 56, 384) 0 ['up_sampling2d_2[0][0]',
'conv_pw_3_relu[0][0]']
Conv_8_1_conv (Conv2D) (None, 56, 56, 192) 663744 ['concatenate_2[0][0]']
Conv_8_1BatchNormalization (Ba (None, 56, 56, 192) 768 ['Conv_8_1_conv[0][0]']
tchNormalization)
Conv_8_1ActivationLayer (Activ (None, 56, 56, 192) 0 ['Conv_8_1BatchNormalization[0][0
ation) ]']
Conv_8_2_conv (Conv2D) (None, 56, 56, 128) 221312 ['Conv_8_1ActivationLayer[0][0]']
Conv_8_2BatchNormalization (Ba (None, 56, 56, 128) 512 ['Conv_8_2_conv[0][0]']
tchNormalization)
Conv_8_2ActivationLayer (Activ (None, 56, 56, 128) 0 ['Conv_8_2BatchNormalization[0][0
ation) ]']
up_sampling2d_3 (UpSampling2D) (None, 112, 112, 12 0 ['Conv_8_2ActivationLayer[0][0]']
8)
concatenate_3 (Concatenate) (None, 112, 112, 19 0 ['up_sampling2d_3[0][0]',
2) 'conv_pw_1_relu[0][0]']
Conv_9_1_conv (Conv2D) (None, 112, 112, 96 165984 ['concatenate_3[0][0]']
)
Conv_9_1BatchNormalization (Ba (None, 112, 112, 96 384 ['Conv_9_1_conv[0][0]']
tchNormalization) )
Conv_9_1ActivationLayer (Activ (None, 112, 112, 96 0 ['Conv_9_1BatchNormalization[0][0
ation) ) ]']
Conv_9_2_conv (Conv2D) (None, 112, 112, 64 55360 ['Conv_9_1ActivationLayer[0][0]']
)
Conv_9_2BatchNormalization (Ba (None, 112, 112, 64 256 ['Conv_9_2_conv[0][0]']
tchNormalization) )
Conv_9_2ActivationLayer (Activ (None, 112, 112, 64 0 ['Conv_9_2BatchNormalization[0][0
ation) ) ]']
up_sampling2d_4 (UpSampling2D) (None, 224, 224, 64 0 ['Conv_9_2ActivationLayer[0][0]']
)
concatenate_4 (Concatenate) (None, 224, 224, 67 0 ['up_sampling2d_4[0][0]',
) 'input_1[0][0]']
Conv_10_1_conv (Conv2D) (None, 224, 224, 48 28992 ['concatenate_4[0][0]']
)
Conv_10_1BatchNormalization (B (None, 224, 224, 48 192 ['Conv_10_1_conv[0][0]']
atchNormalization) )
Conv_10_1ActivationLayer (Acti (None, 224, 224, 48 0 ['Conv_10_1BatchNormalization[0][
vation) ) 0]']
Conv_10_2_conv (Conv2D) (None, 224, 224, 32 13856 ['Conv_10_1ActivationLayer[0][0]'
) ]
Conv_10_2BatchNormalization (B (None, 224, 224, 32 128 ['Conv_10_2_conv[0][0]']
atchNormalization) )
Conv_10_2ActivationLayer (Acti (None, 224, 224, 32 0 ['Conv_10_2BatchNormalization[0][
vation) ) 0]']
spatial_dropout2d (SpatialDrop (None, 224, 224, 32 0 ['Conv_10_2ActivationLayer[0][0]'
out2D) ) ]
conv2d (Conv2D) (None, 224, 224, 1) 33 ['spatial_dropout2d[0][0]']
reshape (Reshape) (None, 224, 224) 0 ['conv2d[0][0]']
==================================================================================================
Total params: 10,283,745
Trainable params: 10,258,689
Non-trainable params: 25,056
__________________________________________________________________________________________________
B. Design your own Dice Coefficient and Loss function. [2 Marks]
def dice_coefficient(y_true, y_pred):
numerator = 2 * tf.reduce_sum(y_true * y_pred)
denominator = tf.reduce_sum(y_true + y_pred)
return numerator / (denominator + tf.keras.backend.epsilon())
def loss(y_true, y_pred):
return binary_crossentropy(y_true, y_pred) - tf.keras.backend.log(dice_coefficient(y_true, y_pred) + tf.keras.backend.epsilon())
C. Train and tune the model as required. [3 Marks]
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy
optimizer = Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss=loss, optimizer=optimizer, metrics=[dice_coefficient])
### Define Call backs
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
checkpoint = ModelCheckpoint("model-{val_loss:.2f}.h5", monitor="val_loss", verbose=1, save_best_only=True, save_weights_only=True)
stop = EarlyStopping(monitor="val_loss", patience=5)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=5, min_lr=1e-6, verbose=1)
import tensorflow as tensorflow
model.fit(X_train, y_train, epochs = 30, batch_size = 1, callbacks = [checkpoint, reduce_lr, stop], validation_data = (X_test, y_test))
Epoch 1/30 400/400 [==============================] - ETA: 0s - loss: 1.3664 - dice_coefficient: 0.4233 Epoch 1: val_loss improved from inf to 1.27807, saving model to model-1.28.h5 400/400 [==============================] - 415s 1s/step - loss: 1.3664 - dice_coefficient: 0.4233 - val_loss: 1.2781 - val_dice_coefficient: 0.4265 - lr: 1.0000e-04 Epoch 2/30 400/400 [==============================] - ETA: 0s - loss: 0.9554 - dice_coefficient: 0.5380 Epoch 2: val_loss improved from 1.27807 to 1.06879, saving model to model-1.07.h5 400/400 [==============================] - 429s 1s/step - loss: 0.9554 - dice_coefficient: 0.5380 - val_loss: 1.0688 - val_dice_coefficient: 0.4832 - lr: 1.0000e-04 Epoch 3/30 400/400 [==============================] - ETA: 0s - loss: 0.7572 - dice_coefficient: 0.6121 Epoch 3: val_loss did not improve from 1.06879 400/400 [==============================] - 508s 1s/step - loss: 0.7572 - dice_coefficient: 0.6121 - val_loss: 1.3607 - val_dice_coefficient: 0.4223 - lr: 1.0000e-04 Epoch 4/30 400/400 [==============================] - ETA: 0s - loss: 0.6008 - dice_coefficient: 0.6780 Epoch 4: val_loss did not improve from 1.06879 400/400 [==============================] - 357s 894ms/step - loss: 0.6008 - dice_coefficient: 0.6780 - val_loss: 1.1771 - val_dice_coefficient: 0.4704 - lr: 1.0000e-04 Epoch 5/30 400/400 [==============================] - ETA: 0s - loss: 0.4811 - dice_coefficient: 0.7326 Epoch 5: val_loss improved from 1.06879 to 1.06484, saving model to model-1.06.h5 400/400 [==============================] - 349s 873ms/step - loss: 0.4811 - dice_coefficient: 0.7326 - val_loss: 1.0648 - val_dice_coefficient: 0.5249 - lr: 1.0000e-04 Epoch 6/30 400/400 [==============================] - ETA: 0s - loss: 0.4263 - dice_coefficient: 0.7631 Epoch 6: val_loss improved from 1.06484 to 1.04839, saving model to model-1.05.h5 400/400 [==============================] - 348s 870ms/step - loss: 0.4263 - dice_coefficient: 0.7631 - val_loss: 1.0484 - val_dice_coefficient: 0.5479 - lr: 1.0000e-04 Epoch 7/30 400/400 [==============================] - ETA: 0s - loss: 0.3557 - dice_coefficient: 0.8012 Epoch 7: val_loss did not improve from 1.04839 400/400 [==============================] - 348s 869ms/step - loss: 0.3557 - dice_coefficient: 0.8012 - val_loss: 1.1341 - val_dice_coefficient: 0.5636 - lr: 1.0000e-04 Epoch 8/30 400/400 [==============================] - ETA: 0s - loss: 0.3023 - dice_coefficient: 0.8297 Epoch 8: val_loss did not improve from 1.04839 400/400 [==============================] - 359s 897ms/step - loss: 0.3023 - dice_coefficient: 0.8297 - val_loss: 1.2142 - val_dice_coefficient: 0.4870 - lr: 1.0000e-04 Epoch 9/30 400/400 [==============================] - ETA: 0s - loss: 0.2705 - dice_coefficient: 0.8475 Epoch 9: val_loss did not improve from 1.04839 400/400 [==============================] - 360s 900ms/step - loss: 0.2705 - dice_coefficient: 0.8475 - val_loss: 1.2626 - val_dice_coefficient: 0.5391 - lr: 1.0000e-04 Epoch 10/30 400/400 [==============================] - ETA: 0s - loss: 0.2445 - dice_coefficient: 0.8646 Epoch 10: val_loss improved from 1.04839 to 1.00107, saving model to model-1.00.h5 400/400 [==============================] - 351s 879ms/step - loss: 0.2445 - dice_coefficient: 0.8646 - val_loss: 1.0011 - val_dice_coefficient: 0.5750 - lr: 1.0000e-04 Epoch 11/30 400/400 [==============================] - ETA: 0s - loss: 0.2176 - dice_coefficient: 0.8811 Epoch 11: val_loss did not improve from 1.00107 400/400 [==============================] - 347s 868ms/step - loss: 0.2176 - dice_coefficient: 0.8811 - val_loss: 1.1781 - val_dice_coefficient: 0.5114 - lr: 1.0000e-04 Epoch 12/30 400/400 [==============================] - ETA: 0s - loss: 0.2002 - dice_coefficient: 0.8920 Epoch 12: val_loss did not improve from 1.00107 400/400 [==============================] - 357s 892ms/step - loss: 0.2002 - dice_coefficient: 0.8920 - val_loss: 1.2435 - val_dice_coefficient: 0.5514 - lr: 1.0000e-04 Epoch 13/30 400/400 [==============================] - ETA: 0s - loss: 0.1862 - dice_coefficient: 0.9013 Epoch 13: val_loss did not improve from 1.00107 400/400 [==============================] - 347s 869ms/step - loss: 0.1862 - dice_coefficient: 0.9013 - val_loss: 1.1629 - val_dice_coefficient: 0.5686 - lr: 1.0000e-04 Epoch 14/30 400/400 [==============================] - ETA: 0s - loss: 0.1728 - dice_coefficient: 0.9096 Epoch 14: val_loss improved from 1.00107 to 0.92527, saving model to model-0.93.h5 400/400 [==============================] - 378s 944ms/step - loss: 0.1728 - dice_coefficient: 0.9096 - val_loss: 0.9253 - val_dice_coefficient: 0.5705 - lr: 1.0000e-04 Epoch 15/30 400/400 [==============================] - ETA: 0s - loss: 0.1590 - dice_coefficient: 0.9184 Epoch 15: val_loss did not improve from 0.92527 400/400 [==============================] - 366s 915ms/step - loss: 0.1590 - dice_coefficient: 0.9184 - val_loss: 1.4799 - val_dice_coefficient: 0.5437 - lr: 1.0000e-04 Epoch 16/30 400/400 [==============================] - ETA: 0s - loss: 0.1535 - dice_coefficient: 0.9221 Epoch 16: val_loss did not improve from 0.92527 400/400 [==============================] - 351s 878ms/step - loss: 0.1535 - dice_coefficient: 0.9221 - val_loss: 1.0739 - val_dice_coefficient: 0.5876 - lr: 1.0000e-04 Epoch 17/30 400/400 [==============================] - ETA: 0s - loss: 0.1587 - dice_coefficient: 0.9206 Epoch 17: val_loss improved from 0.92527 to 0.82464, saving model to model-0.82.h5 400/400 [==============================] - 349s 873ms/step - loss: 0.1587 - dice_coefficient: 0.9206 - val_loss: 0.8246 - val_dice_coefficient: 0.6148 - lr: 1.0000e-04 Epoch 18/30 400/400 [==============================] - ETA: 0s - loss: 0.1552 - dice_coefficient: 0.9230 Epoch 18: val_loss did not improve from 0.82464 400/400 [==============================] - 350s 875ms/step - loss: 0.1552 - dice_coefficient: 0.9230 - val_loss: 1.3627 - val_dice_coefficient: 0.5465 - lr: 1.0000e-04 Epoch 19/30 400/400 [==============================] - ETA: 0s - loss: 0.1479 - dice_coefficient: 0.9279 Epoch 19: val_loss did not improve from 0.82464 400/400 [==============================] - 386s 966ms/step - loss: 0.1479 - dice_coefficient: 0.9279 - val_loss: 1.5336 - val_dice_coefficient: 0.5133 - lr: 1.0000e-04 Epoch 20/30 400/400 [==============================] - ETA: 0s - loss: 0.1375 - dice_coefficient: 0.9342 Epoch 20: val_loss did not improve from 0.82464 400/400 [==============================] - 347s 868ms/step - loss: 0.1375 - dice_coefficient: 0.9342 - val_loss: 1.5507 - val_dice_coefficient: 0.5315 - lr: 1.0000e-04 Epoch 21/30 400/400 [==============================] - ETA: 0s - loss: 0.1303 - dice_coefficient: 0.9379 Epoch 21: val_loss did not improve from 0.82464 400/400 [==============================] - 351s 878ms/step - loss: 0.1303 - dice_coefficient: 0.9379 - val_loss: 1.6304 - val_dice_coefficient: 0.5087 - lr: 1.0000e-04 Epoch 22/30 400/400 [==============================] - ETA: 0s - loss: 0.1200 - dice_coefficient: 0.9442 Epoch 22: val_loss did not improve from 0.82464 Epoch 22: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05. 400/400 [==============================] - 610s 2s/step - loss: 0.1200 - dice_coefficient: 0.9442 - val_loss: 1.6013 - val_dice_coefficient: 0.5077 - lr: 1.0000e-04
<keras.callbacks.History at 0x1fcc1fdce50>
D. Evaluate and share insights on performance of the model. [2 Marks]
model.evaluate(X_test, y_test, verbose = 1)
1/1 [==============================] - 1s 1s/step - loss: 1.0335 - dice_coefficient: 0.5205
[1.0334504842758179, 0.5204586386680603]
history=model.fit(X_train, y_train, epochs = 30, batch_size = 1, callbacks = [checkpoint, reduce_lr, stop], validation_data = (X_test, y_test))
Epoch 1/30 400/400 [==============================] - ETA: 0s - loss: 0.1118 - dice_coefficient: 0.9494 Epoch 1: val_loss did not improve from 0.82464 400/400 [==============================] - 358s 894ms/step - loss: 0.1118 - dice_coefficient: 0.9494 - val_loss: 1.6515 - val_dice_coefficient: 0.4938 - lr: 2.0000e-05 Epoch 2/30 400/400 [==============================] - ETA: 0s - loss: 0.0866 - dice_coefficient: 0.9652 Epoch 2: val_loss did not improve from 0.82464 400/400 [==============================] - 403s 1s/step - loss: 0.0866 - dice_coefficient: 0.9652 - val_loss: 1.5656 - val_dice_coefficient: 0.5067 - lr: 2.0000e-05 Epoch 3/30 400/400 [==============================] - ETA: 0s - loss: 0.0727 - dice_coefficient: 0.9743 Epoch 3: val_loss did not improve from 0.82464 400/400 [==============================] - 410s 1s/step - loss: 0.0727 - dice_coefficient: 0.9743 - val_loss: 1.4020 - val_dice_coefficient: 0.5432 - lr: 2.0000e-05 Epoch 4/30 400/400 [==============================] - ETA: 0s - loss: 0.0675 - dice_coefficient: 0.9778 Epoch 4: val_loss did not improve from 0.82464 400/400 [==============================] - 397s 993ms/step - loss: 0.0675 - dice_coefficient: 0.9778 - val_loss: 1.5968 - val_dice_coefficient: 0.5134 - lr: 2.0000e-05 Epoch 5/30 400/400 [==============================] - ETA: 0s - loss: 0.0661 - dice_coefficient: 0.9791 Epoch 5: val_loss did not improve from 0.82464 400/400 [==============================] - 387s 967ms/step - loss: 0.0661 - dice_coefficient: 0.9791 - val_loss: 1.7373 - val_dice_coefficient: 0.5037 - lr: 2.0000e-05 Epoch 6/30 400/400 [==============================] - ETA: 0s - loss: 0.0663 - dice_coefficient: 0.9795 Epoch 6: val_loss did not improve from 0.82464 400/400 [==============================] - 388s 970ms/step - loss: 0.0663 - dice_coefficient: 0.9795 - val_loss: 1.5936 - val_dice_coefficient: 0.5407 - lr: 2.0000e-05 Epoch 7/30 400/400 [==============================] - ETA: 0s - loss: 0.0666 - dice_coefficient: 0.9794 Epoch 7: val_loss did not improve from 0.82464 400/400 [==============================] - 375s 937ms/step - loss: 0.0666 - dice_coefficient: 0.9794 - val_loss: 1.4293 - val_dice_coefficient: 0.5527 - lr: 2.0000e-05 Epoch 8/30 400/400 [==============================] - ETA: 0s - loss: 0.0694 - dice_coefficient: 0.9773 Epoch 8: val_loss did not improve from 0.82464 Epoch 8: ReduceLROnPlateau reducing learning rate to 3.999999898951501e-06. 400/400 [==============================] - 378s 945ms/step - loss: 0.0694 - dice_coefficient: 0.9773 - val_loss: 1.4690 - val_dice_coefficient: 0.5454 - lr: 2.0000e-05
model.evaluate(X_test, y_test, verbose = 1)
1/1 [==============================] - 4s 4s/step - loss: 0.9426 - dice_coefficient: 0.5766
[0.9425766468048096, 0.576626718044281]
Dice co-efficient
Dice coefficient is defined as follows:
Dice coefficient = 2 * |X intersection Y| / |X|+|y|
X is the predicted set of pixels and Y is the ground truth.
A higher dice coefficient is better. A dice coefficient of 1 can be achieved when there is perfect overlap between X and Y. Since the denominator is constant, the only way to maximize this metric is to increase overlap between X and Y
history.params
{'verbose': 1, 'epochs': 30, 'steps': 400}
train_dice_coefficient=history.history['dice_coefficient'][-1]
test_dice_coefficient=history.history['val_dice_coefficient'][-1]
train_loss=history.history['loss'][-1]
test_loss=history.history['val_loss'][-1]
print('Training dice Cofficient ',train_dice_coefficient,'Test Dice Coefficient',test_dice_coefficient,'train_loss',train_loss,'test_loss',test_loss)
Training dice Cofficient 0.9772839546203613 Test Dice Coefficient 0.5454108715057373 train_loss 0.06941163539886475 test_loss 1.4690097570419312
def plot_dice_coefficient_loss(history):
accuracy = history.history['dice_coefficient']
val_accuracy = history.history['val_dice_coefficient']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(accuracy)) # Get number of epochs
plt.plot ( epochs, accuracy, label = 'training Dice Coefficient' )
plt.plot ( epochs, val_accuracy, label = 'validation Dice Coefficien' )
plt.title ('Training and validation Dice co-efficient')
plt.ylabel('Dice Coefficient')
plt.xlabel('Epochs')
plt.legend(loc = 'lower right')
plt.figure()
plt.plot ( epochs, loss, label = 'training loss' )
plt.plot ( epochs, val_loss, label = 'validation loss' )
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc = 'upper right')
plt.title ('Training and validation loss')
plot_dice_coefficient_loss(history)
Insight:
* The dice coefficient of Test data the model is **0.5766** where as training data its **0.97** Which is almost near 1. <br>
* The graph of Dice coeffient of Training and validation shows clear overfitting. Its performing better in Training data rather than test data<br>
* The dice-coefficient and Loss are almost constant over epochs its not showing much change for change in epochs <br>
3. Test the model predictions on the test image: ‘image with index 3 in the test data’ and visualise the predicted masks on the faces in the image. [2 Marks]
y_pred = model.predict(X_test, verbose = 1)
1/1 [==============================] - 3s 3s/step
X_test.shape
(9, 224, 224, 3)
# To access the image in index 3
n = 3
image = cv2.resize(X_test[n], dsize = (IMAGE_HEIGHT, IMAGE_WIDTH), interpolation = cv2.INTER_CUBIC)
pred_mask = cv2.resize(1.0*(y_pred[n] > 0.1), (IMAGE_WIDTH, IMAGE_HEIGHT))
image2 = image
image2[:,:,0] = pred_mask*image[:,:,0]
image2[:,:,1] = pred_mask*image[:,:,1]
image2[:,:,2] = pred_mask*image[:,:,2]
out_image = image2
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(out_image)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
<matplotlib.image.AxesImage at 0x1fcd89c6a30>
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(pred_mask, alpha = 1)
<matplotlib.image.AxesImage at 0x1fcd89ff7f0>
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(X_test[n])
plt.savefig('image.jpg', bbox_inches = 'tight', pad_inches = 0)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(y_pred[n], alpha = 0.8)
plt.savefig('mask.jpg', bbox_inches = 'tight', pad_inches = 0)
fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
img = cv2.imread('image.jpg', 1)
mask = cv2.imread('mask.jpg', 1)
img = cv2.add(img, mask)
plt.imshow(img, alpha = 0.8)
<matplotlib.image.AxesImage at 0x1fcd91b7970>
Conclusion
The objective of the Project is to find the face detection model. The given data set has the numpy array of images having the face images and its mask coordinates of the Faces. The model is built with Mobile Net for pretraining and appropriate upsamping is been added to imitate the U-Net architecture.
DOMAIN: Entertainment
CONTEXT:Company X owns a movie application and repository which caters movie streaming to millions of users who on subscription basis. Company wants to automate the process of cast and crew information in each scene from a movie such that when a user pauses on the movie and clicks on cast information button, the app will show details of the actor in the scene. Company has an in-house computer vision and multimedia experts who need to detect faces from screen shots from the movie scene.
The data labelling is already done.
• DATA DESCRIPTION:The dataset comprises of face images
• PROJECT OBJECTIVE: : To create an image dataset to be used by AI team build an image classifier data. Profile images of people are given.
1. Read/import images from folder ‘training_images’. [2 Marks]
## Extract the file from zip file
import zipfile
zip_reference_train = zipfile.ZipFile('training_images-20211126T092819Z-001.zip', 'r')
## Extract to new folder called unzipped
zip_reference_train.extractall('training_unzipped') # unzip directory
zip_reference_train.close()
main_dir = 'training_unzipped'
train_dir = main_dir +'/training_images'
print(train_dir)
training_unzipped/training_images
import os
os.getcwd()
'C:\\Users\\HP\\Python\\ComputerVision_2'
classes=os.listdir(train_dir)
print(len(classes))
1091
Observation : There are 1091 images found in the given directory
2. Write a loop which will iterate through all the images in the ‘training_images’ folder and detect the faces present on all the images. [3 Marks]
Hint: You can use ’haarcascade_frontalface_default.xml’ from internet to detect faces which is available open source.
Haar Cascade Classifiers : Haar Cascade classifier is an effective object detection approach which was proposed by Paul Viola and Michael Jones in their paper, “Rapid Object Detection using a Boosted Cascade of Simple Features” in 2001.
This is basically a machine learning based approach where a cascade function is trained from a lot of images both positive and negative. Based on the training it is then used to detect the objects in the other images.
So how this works is they are huge individual .xml files with a lot of feature sets and each xml corresponds to a very specific type of use case. [Reference link given below]
For the given problem the "haarcascade_frontalface_default.xml" is used for detecting the frontal face images.
import cv2
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
Notes: haarcascade_frontalface_default.xml file is downloaded from github and stored into local location. That file is stored to local system as face_cascade
# Reading the image
test_img = cv2.imread(train_dir+'/'+'real_00089.jpg')
plt.grid(False)
plt.imshow(test_img)
<matplotlib.image.AxesImage at 0x1fd50279a00>
faces=[]
The images are usually BGR (Blue, Green and Red channel). Its computationally more intensive. Converting to Gray scale as it has one channel, black and white
# , img = cap.read()
gray = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 5)
plt.grid(False)
plt.imshow(gray,cmap=plt.get_cmap('gray'))
<matplotlib.image.AxesImage at 0x1fd403d89d0>
faces
array([[102, 185, 405, 405]])
if faces is ():
print("No faces found")
for (x,y,w,h) in faces:
cv2.rectangle(test_img, (x,y), (x+w,y+h), (127,0,255), 2)
cv2.imshow('Face Detection', test_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
plt.grid(False)
plt.imshow(test_img)
<matplotlib.image.AxesImage at 0x1fd40477f10>
Observations:
The above code displays the image with Rectangle detecting the face in it.
img = cv2.imread(train_dir+'/'+'real_00005.jpg')
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd40b4caf0>
# Importing OpenCV package
import cv2
# Reading the image
# Converting image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Loading the required haar-cascade xml classifier file
haar_cascade = cv2.CascadeClassifier('Haarcascade_frontalface_default.xml')
# Applying the face detection method on the grayscale image
faces_rect = haar_cascade.detectMultiScale(gray_img, 1.1, 5)
print(faces_rect)
# Iterating through rectangles of detected faces
for (x, y, w, h) in faces_rect:
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imshow('Detected faces', img)
cv2.waitKey(0)
()
-1
faces=[]
# Importing OpenCV package
import cv2
# Reading the image
img = cv2.imread(train_dir+'/'+'real_00012.jpg')
# Converting image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Loading the required haar-cascade xml classifier file
haar_cascade = cv2.CascadeClassifier('Haarcascade_frontalface_default.xml')
# Applying the face detection method on the grayscale image
faces_rect = haar_cascade.detectMultiScale(gray_img, 1.1, 2)
print(faces_rect)
[[113 188 375 375]]
if faces_rect is ():
print("No faces found")
for (x,y,w,h) in faces_rect:
cv2.rectangle(img, (x,y), (x+w,y+h), (127,0,255), 2)
cv2.imshow('Face Detection', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd4200bd00>
x=0
for images in os.listdir(train_dir):
if(x<3):
print(x)
print(images)
# Reading the image
img = cv2.imread(train_dir+'/'+images)
# Converting image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Loading the required haar-cascade xml classifier file
haar_cascade = cv2.CascadeClassifier('Haarcascade_frontalface_default.xml')
# Applying the face detection method on the grayscale image
faces_rect = haar_cascade.detectMultiScale(gray_img, 1.1, 5)
print(faces_rect)
print(faces_rect[0][0],faces_rect[0][1])
print('no of images=',len(faces_rect))
x=x+1
0 real_00001.jpg [[146 140 428 428]] 146 140 no of images= 1 1 real_00002.jpg [[ 80 140 436 436]] 80 140 no of images= 1 2 real_00003.jpg [[ 33 121 362 362]] 33 121 no of images= 1
lst = []
for images in os.listdir(train_dir): ## Loop to go through all the imagess
# Reading the image
img = cv2.imread(train_dir+'/'+images)
# Converting image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Loading the required haar-cascade xml classifier file
haar_cascade = cv2.CascadeClassifier('Haarcascade_frontalface_default.xml')
# Applying the face detection method on the grayscale image
faces_rect = haar_cascade.detectMultiScale(gray_img, 1.1, 5)
#lst.append([faces_rect[0][0], faces_rect[0][1],faces_rect[0][2],faces_rect[0][3],len(faces_rect),images])
Comments : The above code would loop through the entire data set and detect the faces
3. From the same loop above, extract metadata of the faces and write into a DataFrame. [3 Marks]
x=0
lst = []
### Creating Columns for Dataframe
cols = [ 'x','y','w','h','Total Faces', 'image Name']
faces_count=0
no_faces_count=0
for images in os.listdir(train_dir):
#print(x)
#print(images)
# Reading the image
img = cv2.imread(train_dir+'/'+images)
# Converting image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Loading the required haar-cascade xml classifier file
haar_cascade = cv2.CascadeClassifier('Haarcascade_frontalface_default.xml')
# Applying the face detection method on the grayscale image
faces_rect = haar_cascade.detectMultiScale(gray_img, 1.1, 5)
#print(faces_rect)
if(len(faces_rect)>0):
lst.append([faces_rect[0][0], faces_rect[0][1],faces_rect[0][2],faces_rect[0][3],len(faces_rect),images])
faces_count=faces_count+1
else:
lst.append([0, 0,0,0,len(faces_rect),images])
no_faces_count=no_faces_count+1
#print(images, len(faces_rect))
print("No of images in which faces are identified",faces_count)
No of images in which faces are identified 907
print("No of images in which faces are not identified",no_faces_count)
face_detect_df = pd.DataFrame(lst, columns=cols)
print(face_detect_df.shape)
No of images in which faces are not identified 184 (1091, 6)
# Reading the image
img = cv2.imread(train_dir+'/'+'real_00951.jpg')
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd502df970>
# Reading the image
img = cv2.imread(train_dir+'/'+'real_00607.jpg')
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd502797f0>
# Reading the image
img = cv2.imread(train_dir+'/'+'real_00692.jpg')
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd502a0f40>
# Reading the image
img = cv2.imread(train_dir+'/'+'real_00970.jpg')
plt.grid(False)
plt.imshow(img)
<matplotlib.image.AxesImage at 0x1fd501db8b0>
4. Save the output Dataframe in .csv format. [2 Marks]
face_detect_df.shape
(1091, 6)
face_detect_df.head()
| x | y | w | h | Total Faces | image Name | |
|---|---|---|---|---|---|---|
| 0 | 146 | 140 | 428 | 428 | 1 | real_00001.jpg |
| 1 | 80 | 140 | 436 | 436 | 1 | real_00002.jpg |
| 2 | 33 | 121 | 362 | 362 | 1 | real_00003.jpg |
| 3 | 134 | 173 | 356 | 356 | 1 | real_00004.jpg |
| 4 | 0 | 0 | 0 | 0 | 0 | real_00005.jpg |
face_detect_df.tail()
| x | y | w | h | Total Faces | image Name | |
|---|---|---|---|---|---|---|
| 1086 | 93 | 167 | 402 | 402 | 1 | real_01077.jpg |
| 1087 | 65 | 76 | 468 | 468 | 1 | real_01078.jpg |
| 1088 | 69 | 67 | 475 | 475 | 1 | real_01079.jpg |
| 1089 | 51 | 87 | 447 | 447 | 1 | real_01080.jpg |
| 1090 | 0 | 0 | 0 | 0 | 0 | real_01081.jpg |
face_detect_df.to_csv("Face_detection_output_haarcascade_frontalface.csv", encoding='utf-8', index=True)
Comments: The face detected co-ordinates are saved into the csv file
Conclusion:
PART C - 30 Marks

1.Unzip, read and Load data(‘PINS.zip’) into session. [2 Marks]
## Extract the file from zip file
import zipfile
try:
zip_reference_pins = zipfile.ZipFile('PINS.zip', 'r')
## Extract to new folder called unzipped
zip_reference_pins.extractall('pins_unzipped') # unzip directory
zip_reference_pins.close()
except FileNotFoundError as e:
print(e.errno,e.strerror,e.with_traceback)
print(e.args)
print(e)
2. Write function to create metadata of the image. [4 Marks]
This function accepts the base path, the identity name/ directory name and the individual file name as input and returns the complete path
class IdentityMetadata():
def __init__(self, base, name, file):# Constructor for the class
self.base = base ## base path name
# identity name
self.name = name
# image file name
self.file = file
def __repr__(self):##a special method used to represent a class's objects as a string.
return self.image_path()
def image_path(self):
return os.path.join(self.base, self.name, self.file)
3. Write a loop to iterate through each and every image and create metadata for all the images. [4 Marks]
Note: This method loops through all the folders in the zipped location and creates the individual paths of the images in an array
def load_metadata(path,image_names):
x=1
metadata = []
for i in os.listdir(path):
#print(i)
for f in os.listdir(os.path.join(path, i)):
# Check file extension. Allow only jpg/jpeg' files.
ext = os.path.splitext(f)[1]
if ext == '.jpg' or ext == '.jpeg':
metadata.append(IdentityMetadata(path, i, f))
image_names.append(f)
if(x==1):
print(f)
print(metadata)
x=2
return np.array(metadata)
import os
source_dir='pins_unzipped/PINS/'
len(os.listdir(source_dir))
100
Comments: The number of Directories in extracted zip file is 100
image_names=[]
# metadata = load_metadata('images')
metadata = load_metadata(source_dir,image_names)
Aaron Paul0_262.jpg [pins_unzipped/PINS/pins_Aaron Paul\Aaron Paul0_262.jpg]
image_names[0]
'Aaron Paul0_262.jpg'
len(image_names)
10770
print('metadata shape :', metadata.shape)
metadata shape : (10770,)
Comments: This data set contains totally 10770 records of data of all 100 celebrities and meta data is created for the dataset.
metadata[1500]
pins_unzipped/PINS/pins_bill gates\bill gates41.jpg
type(metadata[1500]), metadata[1500].image_path()
(__main__.IdentityMetadata, 'pins_unzipped/PINS/pins_bill gates\\bill gates41.jpg')
import cv2
def load_image(path):
img = cv2.imread(path, 1)
# OpenCV loads images with color channels
# in BGR order. So we need to reverse them
return img[...,::-1]
load_image('pins_unzipped/PINS/pins_bill gates\\bill gates41.jpg')
array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
...,
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
print('metadata shape :', metadata.shape)
metadata shape : (10770,)
Comments: This data set contains totally 10770 records of data of all 100 celebrities and meta data is created for the dataset.
4. Generate Embeddings vectors on the each face in the dataset. [4 Marks]
Hint: Use ‘vgg_face_weights.h5
### VGG face model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten, Activation
def vgg_face():
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))
return model
model = vgg_face()
model.load_weights('vgg_face_weights.h5')
model.layers[0], model.layers[-2]
(<keras.layers.convolutional.ZeroPadding2D at 0x1fd5091d910>, <keras.layers.core.flatten.Flatten at 0x1fd5a651c40>)
from tensorflow.keras.models import Model
vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
type(vgg_face_descriptor)
keras.engine.functional.Functional
vgg_face_descriptor.inputs, vgg_face_descriptor.outputs
([<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'zero_padding2d_13_input')>], [<KerasTensor: shape=(None, 2622) dtype=float32 (created by layer 'flatten_1')>])
# Get embedding vector for first image in the metadata using the pre-trained model
img_path = metadata[0].image_path()
img = load_image(img_path)
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize = (224,224))
print(img.shape)
# Obtain embedding vector for an image
# Get the embedding vector for the above image using vgg_face_descriptor model and print the shape
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
print(embedding_vector.shape)
(224, 224, 3) (2622,)
embedding_vector[0], type(embedding_vector), type(embedding_vector[0])
(0.031703044, numpy.ndarray, numpy.float32)
embedding_vector[2], embedding_vector[98], embedding_vector[-2]
(-0.012434018, -0.008693828, 0.0021908146)
total_images = len(metadata)
print('total_images :', total_images)
total_images : 10770
embeddings = np.zeros((metadata.shape[0], 2622))
for i, m in enumerate(metadata):
img_path = metadata[i].image_path()
img = load_image(img_path)
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize = (224,224))
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
embeddings[i]=embedding_vector
print('embeddings shape :', embeddings.shape)
embeddings shape : (10770, 2622)
embeddings[0], embeddings[988], embeddings[988].shape
(array([ 0.03170304, -0.0150513 , -0.01243402, ..., 0.00043139,
0.00219081, -0.00908097]),
array([ 0.00793027, 0.00865317, -0.00304816, ..., -0.01746991,
0.02693006, 0.00946763]),
(2622,))
embeddings[8000]
array([-0.00379806, 0.00199546, -0.00214994, ..., -0.02244972,
-0.00273736, -0.00215481])
5. Build distance metrics for identifying the distance between two similar and dissimilar images. [4 Marks]
def distance(emb1, emb2):
return np.sum(np.square(emb1 - emb2))
def show_pair(idx1, idx2):
plt.figure(figsize=(8,3))
plt.suptitle(f'Distance between {idx1} & {idx2}= {distance(embeddings[idx1], embeddings[idx2]):.2f}')
plt.subplot(121)
plt.grid(False)
plt.imshow(load_image(metadata[idx1].image_path()))
plt.subplot(122)
plt.grid(False)
plt.imshow(load_image(metadata[idx2].image_path()));
show_pair(900, 901)
show_pair(500, 501)
show_pair(1407, 901)
show_pair(1408, 501)
Distance between the similar images are less and when dissimilar images its high
train_idx = np.arange(metadata.shape[0]) % 9 != 0 #every 9th example goes in test data and rest go in train data
test_idx = np.arange(metadata.shape[0]) % 9 == 0
# one half as train examples of 10 identities
X_train = embeddings[train_idx]
# another half as test examples of 10 identities
X_test = embeddings[test_idx]
targets = np.array([m.name for m in metadata])
#train labels
y_train = targets[train_idx]
#test labels
y_test = targets[test_idx]
print('X_train shape : ({0},{1})'.format(X_train.shape[0], X_train.shape[1]))
print('y_train shape : ({0},)'.format(y_train.shape[0]))
print('X_test shape : ({0},{1})'.format(X_test.shape[0], X_test.shape[1]))
print('y_test shape : ({0},)'.format(y_test.shape[0]))
X_train shape : (9573,2622) y_train shape : (9573,) X_test shape : (1197,2622) y_test shape : (1197,)
y_test[0], y_train[988]
('pins_Aaron Paul', 'pins_barbara palvin face')
len(np.unique(y_test)), len(np.unique(y_train))
(100, 100)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
print(le.classes_)
y_test_encoded = le.transform(y_test)
['pins_Aaron Paul' 'pins_Alvaro Morte' 'pins_Amanda Crew' 'pins_Amaury Nolasco' 'pins_Anna Gunn' 'pins_Benedict Cumberbatch' 'pins_Betsy Brandt' 'pins_Brenton Thwaites' 'pins_Brit Marling' 'pins_Bryan Cranston' 'pins_Caity Lotz' 'pins_Cameron Monaghan' 'pins_Chance Perdomo' 'pins_Chris Evans' 'pins_Chris Pratt' 'pins_Cobie Smulders' 'pins_Danielle Panabaker' 'pins_Dave Franco' 'pins_Dominic Purcell' 'pins_Dwayne Johnson' 'pins_Emilia Clarke' 'pins_Emily Bett Rickards' 'pins_Emma Stone' 'pins_Gwyneth Paltrow' 'pins_Henry Cavil' 'pins_Jason Momoa' 'pins_Jeremy Renner' 'pins_Jesse Eisenberg' 'pins_Jim Parsons' 'pins_Jon Bernthal' 'pins_Josh Radnor' 'pins_Kit Harington' 'pins_Krysten Ritter' 'pins_Kumail Nanjiani' 'pins_Maisie Williams' 'pins_Mark Ruffalo' 'pins_Martin Starr' 'pins_Melissa benoit' 'pins_Mike Colter' 'pins_Morena Baccarin' 'pins_Morgan Freeman' 'pins_Natalie Portman' 'pins_Neil Patrick Harris' 'pins_Paul Rudd' 'pins_Pedro Alonso' 'pins_Peter Dinklage' 'pins_RJ Mitte' 'pins_Rami Melek' 'pins_Robert Knepper' 'pins_Robin Taylor' 'pins_Ryan Reynolds' 'pins_Sarah Wayne Callies' 'pins_Scarlett Johansson' 'pins_Sebastian Stan' 'pins_Sophie Turner' 'pins_Stephen Amell' 'pins_Sundar Pichai' 'pins_Thomas Middleditch' 'pins_Tom Cavanagh' 'pins_Ursula Corbero' 'pins_Wentworth Miller' 'pins_Willa Holland' 'pins_William Fichtner' 'pins_alexandra daddario' 'pins_alycia debnam carey face' 'pins_amber heard face' 'pins_anne hathaway' 'pins_barbara palvin face' 'pins_bellamy blake face' 'pins_bill gates' 'pins_brie larson' 'pins_chadwick boseman face' 'pins_david mazouz' 'pins_drake' 'pins_dua lipa face' 'pins_eliza taylor' 'pins_elizabeth olsen face' 'pins_elon musk' 'pins_emma watson face' 'pins_gal gadot face' 'pins_grant gustin face' 'pins_jason isaacs' 'pins_jeff bezos' 'pins_kiernan shipka' 'pins_kristen stewart face' 'pins_lindsey morgan face' 'pins_margot robbie face' 'pins_maria pedraza' 'pins_mark zuckerberg' 'pins_miguel herran' 'pins_millie bobby brown' 'pins_rihanna' 'pins_robert downey jr face' 'pins_sean pertwee' 'pins_selena gomez' 'pins_shakira' 'pins_tati gabrielle' 'pins_taylor swift' 'pins_tom holland face' 'pins_zendaya']
print('y_train_encoded : ', y_train_encoded)
print('y_test_encoded : ', y_test_encoded)
y_train_encoded : [ 0 0 0 ... 99 99 99] y_test_encoded : [ 0 0 0 ... 99 99 99]
## Standarizze with Standard Scalar
# Standarize features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)
print('X_train_std shape : ({0},{1})'.format(X_train_std.shape[0], X_train_std.shape[1]))
print('y_train_encoded shape : ({0},)'.format(y_train_encoded.shape[0]))
print('X_test_std shape : ({0},{1})'.format(X_test_std.shape[0], X_test_std.shape[1]))
print('y_test_encoded shape : ({0},)'.format(y_test_encoded.shape[0]))
X_train_std shape : (9573,2622) y_train_encoded shape : (9573,) X_test_std shape : (1197,2622) y_test_encoded shape : (1197,)
6. Use PCA for dimensionality reduction. [2 Marks]
from sklearn.decomposition import PCA
pca = PCA(n_components=128)
X_train_pca = pca.fit_transform(X_train_std)
X_test_pca = pca.transform(X_test_std)
7. Build an SVM classifier in order to map each image to its right person. [4 Marks]
from sklearn.svm import SVC
clf = SVC(C=5., gamma=0.001)
clf.fit(X_train_pca, y_train_encoded)
SVC(C=5.0, gamma=0.001)
y_predict = clf.predict(X_test_pca)
y_predict_encoded = le.inverse_transform(y_predict)
print('y_predict_encoded : ',y_predict_encoded)
y_predict_encoded : ['pins_Aaron Paul' 'pins_Aaron Paul' 'pins_Aaron Paul' ... 'pins_zendaya' 'pins_zendaya' 'pins_zendaya']
print('y_predict shape : ', y_predict.shape)
print('y_test_encoded shape : ', y_test_encoded.shape)
y_predict shape : (1197,) y_test_encoded shape : (1197,)
y_test_encoded[32:49]
array([ 1, 1, 1, 1, 1, 1, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])
# Find the classification accuracy
accuracy_score(y_test_encoded, y_predict)
0.949874686716792
example_idx = 501
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = y_predict[example_idx]
example_identity = y_predict_encoded[example_idx]
plt.grid(False)
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
example_idx = 350
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = y_predict[example_idx]
example_identity = y_predict_encoded[example_idx]
plt.grid(False)
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
8. Import and display the the test images. [2 Marks]
Hint: ‘Benedict Cumberbatch9.jpg’ and ‘Dwayne Johnson4.jpg’ are the test images
test_img1='Benedict Cumberbatch9.jpg'
test_img2='Dwayne Johnson4.jpg'
def displayImage(image_name):
img = cv2.imread(image_name)
plt.grid(False)
plt.imshow(img)
plt.title(image_name)
displayImage(test_img1)
displayImage(test_img2)
9. Use the trained SVM model to predict the face on both test images. [4 Marks]
def getEmbedding(image):
img = load_image(image)
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize = (224,224))
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
return embedding_vector
def displayPrediction(image_name,title):
img = cv2.imread(image_name)
plt.grid(False)
plt.imshow(img)
plt.title(title)
def predictImage(image):
embedding=getEmbedding(image)
test_std=scaler.transform([embedding])
test_pca =pca.transform(test_std)
encoded_test = clf.predict(test_pca)
predicted=le.inverse_transform(encoded_test)[0]
displayPrediction(image,predicted)
return predicted
predict_1=predictImage(test_img1)
print("Predicted face is ",predict_1)
Predicted face is pins_Benedict Cumberbatch
predict_2=predictImage(test_img2)
print("Predicted face is ",predict_2)
Predicted face is pins_Dwayne Johnson
Conclusion: